{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# XGBoost Parameter Tuning for Rent Listing Inqueries \n",
    "\n",
    "Rental Listing Inquiries数据集是Kaggle平台上的一个分类竞赛任务，需要根据公寓的特征来预测其受欢迎程度（用户感兴趣程度分为高、中、低三类）。其中房屋的特征x共有14维，响应值y为用户对该公寓的感兴趣程度。评价标准为logloss。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 一、 直接调用xgboost内嵌的cv寻找最佳的参数n_estimators"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "首先 import 必要的模块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xgboost import XGBClassifier    #在sklearn框架下调用xgboost\n",
    "import xgboost as xgb      #单独调用xgboost\n",
    "\n",
    "import pandas as pd \n",
    "import numpy as np\n",
    "\n",
    "from sklearn.metrics import log_loss\n",
    "\n",
    "from matplotlib import pyplot\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 读取数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>Year</th>\n",
       "      <th>...</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "      <th>interest_level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.5</td>\n",
       "      <td>3</td>\n",
       "      <td>40.7145</td>\n",
       "      <td>-73.9425</td>\n",
       "      <td>3000</td>\n",
       "      <td>1200.0</td>\n",
       "      <td>750.000000</td>\n",
       "      <td>-1.5</td>\n",
       "      <td>4.5</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7947</td>\n",
       "      <td>-73.9667</td>\n",
       "      <td>5465</td>\n",
       "      <td>2732.5</td>\n",
       "      <td>1821.666667</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7388</td>\n",
       "      <td>-74.0018</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.0</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7539</td>\n",
       "      <td>-73.9677</td>\n",
       "      <td>3275</td>\n",
       "      <td>1637.5</td>\n",
       "      <td>1637.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "      <td>40.8241</td>\n",
       "      <td>-73.9493</td>\n",
       "      <td>3350</td>\n",
       "      <td>1675.0</td>\n",
       "      <td>670.000000</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>2016</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 225 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bathrooms  bedrooms  latitude  longitude  price  price_bathrooms  \\\n",
       "0        1.5         3   40.7145   -73.9425   3000           1200.0   \n",
       "1        1.0         2   40.7947   -73.9667   5465           2732.5   \n",
       "2        1.0         1   40.7388   -74.0018   2850           1425.0   \n",
       "3        1.0         1   40.7539   -73.9677   3275           1637.5   \n",
       "4        1.0         4   40.8241   -73.9493   3350           1675.0   \n",
       "\n",
       "   price_bedrooms  room_diff  room_num  Year       ...        walk  walls  \\\n",
       "0      750.000000       -1.5       4.5  2016       ...           0      0   \n",
       "1     1821.666667       -1.0       3.0  2016       ...           0      0   \n",
       "2     1425.000000        0.0       2.0  2016       ...           0      0   \n",
       "3     1637.500000        0.0       2.0  2016       ...           0      0   \n",
       "4      670.000000       -3.0       5.0  2016       ...           0      0   \n",
       "\n",
       "   war  washer  water  wheelchair  wifi  windows  work  interest_level  \n",
       "0    0       0      0           0     0        0     0               1  \n",
       "1    0       0      0           0     0        0     0               2  \n",
       "2    0       0      0           0     0        0     0               0  \n",
       "3    0       0      0           0     0        0     0               2  \n",
       "4    1       0      0           0     0        0     0               2  \n",
       "\n",
       "[5 rows x 225 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# path to where the data lies\n",
    "#dpath = './data/'\n",
    "train = pd.read_csv(\"RentListingInquries_FE_train.csv\")\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 49352 entries, 0 to 49351\n",
      "Columns: 225 entries, bathrooms to interest_level\n",
      "dtypes: float64(7), int64(218)\n",
      "memory usage: 84.7 MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 准备数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train = train['interest_level']\n",
    "train = train.drop([ \"interest_level\"], axis=1)\n",
    "X_train = train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "# 训练样本6w+，交叉验证太慢，用train_test_split估计模型性能\n",
    "from sklearn.model_selection import train_test_split\n",
    "X_train_part, X_val, y_train_part, y_val = train_test_split(X_train, y_train, train_size = 0.33,random_state = 0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "默认参数，此时学习率为0.1，比较大，观察弱分类数目的大致范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#直接调用xgboost内嵌的交叉验证（cv），可对连续的n_estimators参数进行快速交叉验证\n",
    "#而GridSearchCV只能对有限个参数进行交叉验证\n",
    "def modelfit(alg, X_train, y_train, cv_folds=5, early_stopping_rounds=10):\n",
    "    xgb_param = alg.get_xgb_params()\n",
    "    xgb_param['num_class'] = 3\n",
    "    \n",
    "    #直接调用xgboost，而非sklarn的wrapper类\n",
    "    xgtrain = xgb.DMatrix(X_train, label = y_train)\n",
    "    \n",
    "    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], folds =cv_folds,\n",
    "             metrics='mlogloss', early_stopping_rounds=early_stopping_rounds)\n",
    "    \n",
    "    cvresult.to_csv('1_nestimators.csv', index_label = 'n_estimators')\n",
    "    \n",
    "    #最佳参数n_estimators\n",
    "    n_estimators = cvresult.shape[0]\n",
    "    \n",
    "    # 采用交叉验证得到的最佳参数n_estimators，训练模型\n",
    "    alg.set_params(n_estimators = n_estimators)\n",
    "    alg.fit(X_train, y_train, eval_metric='mlogloss')\n",
    "        \n",
    "    #Predict training set:\n",
    "    #train_predprob = alg.predict_proba(X_train)\n",
    "    #logloss = log_loss(y_train, train_predprob)\n",
    "\n",
    "   #Print model report:\n",
    "   # print (\"logloss of train :\" )\n",
    "   # print logloss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#params = {\"objective\": \"multi:softprob\", \"eval_metric\":\"mlogloss\", \"num_class\": 9}\n",
    "xgb1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=1000,  #数值大没关系，cv会自动返回合适的n_estimators\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample = 0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel=0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "modelfit(xgb1, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'base_score': 0.5,\n",
       " 'booster': 'gbtree',\n",
       " 'colsample_bylevel': 0.7,\n",
       " 'colsample_bytree': 0.8,\n",
       " 'gamma': 0,\n",
       " 'learning_rate': 0.1,\n",
       " 'max_delta_step': 0,\n",
       " 'max_depth': 6,\n",
       " 'min_child_weight': 1,\n",
       " 'missing': None,\n",
       " 'n_estimators': 231,\n",
       " 'nthread': 1,\n",
       " 'objective': 'multi:softprob',\n",
       " 'reg_alpha': 0,\n",
       " 'reg_lambda': 1,\n",
       " 'scale_pos_weight': 1,\n",
       " 'seed': 3,\n",
       " 'silent': 1,\n",
       " 'subsample': 0.5}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb1.get_xgb_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xl8XXWd//HX597sTdKkbbqvlFJaoBSoFJClCDrAKLgwSBFHHEfAn4zKuPxw9KfIuM2M6DguKDKIooIoqAgoIqvIImXvQhe6pvuWZmn2+/n98T03uUmTNi29OUnu+/l4nEfuWe65n3tyc98553vO95i7IyIiApCIuwARERk4FAoiItJBoSAiIh0UCiIi0kGhICIiHRQKIiLSQaEgksHM/s3Mbom7DpG4KBQGGTMrNbO1ZnZZxrQyM1tvZhdnTJtnZveZ2W4zqzGzpWb2FTOrjOZfYWbtZlYfDavN7CNZrn2BmVVn8zUORk/1uPtX3f2fs/R6a83s3GysOxv66/c12LbLUKdQGGTcvR64Evi2mVVFk/8TWOTuvwYws9OAx4C/Ake7ewVwHtAGHJ+xuqfdvdTdS4GLgf80sxP6553IwTCzvLhrkBzh7hoG4QDcBtwBLAB2AuMy5j0JfOcAz78CeLLbtL8Bl2WMXwgsAWoIITMrY96saFpNtMyFGfMuAJYCdcBG4FPAMKARSAH10TC+l/f1PeD+6PnPAtP7sD2OBh4CdgHLgUsOpR7geuBn0fOmAg58ENgA7AauBt4EvBK99+9mvM504JHo97ED+DlQEc27PXqtxui1PtOHbbwW+L/RazUDedH4xui9LAfO6WFbnAJsAZIZ094FvBI9PhlYBNQCW4Fv9rJNFwDVvcwbDvwU2A6sAz4PJKJ5SeDGaBusAa6JtmNeL+taC5zby7wPA6ui3+u96c8MYMC3gG3AnmgbHdvb7zvuv9fBNMRegIZD/MVBJbA5+sP7YMb0YUA7sOAAz7+CjFCIvuhqgKOi8aOABuCtQD7wmeiPsyAaXwX8WzT+lugPcGb03M3AGRl1nhg97vVLJqOO26IvgJOjL8GfA3ce4DnDCF/aH4yec2K0XY452HroORR+ABQBbwOagN8Co4EJ0ZfSWdHyR0bbqxCoAp4A/jtj3V2+/Pa3jTOWfwmYBBQDM6P3OT6jvh4DE3gdeGvG+K+A66LHTwPvjx6XAqf0so5ef1+EQPgdUBbVsQL4UDTvasKX8sRoe/+ZQwiF6HO1I/p9FgLfAZ6I5v0d8DxQQQiIWUT/GPX2+9bQt0GHjwYpd99N+A+zBLgnY1Yl4bDglvQEM/vPqF2hwcw+n7HsKdH0esJewu3Aymjee4H73f0hd28FvkH4YjqN8J9oKfB1d29x90eA+4CF0XNbgdlmVu7uu939hYN8e/e4+9/cvY0QCnMPsPzbgbXu/mN3b4te727CIbHDUc+/u3uTu/+J8CV+h7tvc/eNwF+AEwDcfVW0vZrdfTvwTeCs/ax3f9s47X/cfYO7NxLCvjB6L/nuvtbdX+9l3XcQ/T7MrIzw3/MdGdvjSDMb5e717v7MwWwMM0tGtX/W3evcfS1hz+D90SKXAN929+roc/r1g1l/hvcBt7r7C+7eDHwWONXMpkbvoYywh2juvszdN2e8vzfy+85pCoVByswuJ/yH9mfgPzJm7SYcphiXnuDun/HQrvAbwn/Sac+4e4WHNoWxwDHAV6N54wmHBdLrSBH+S50QzdsQTUtbF80DeA/hS2idmT1uZqce5NvbkvF4LyGA9mcKMD8KuBozqyF8oYw9TPVszXjc2MN4KYCZjTazO81so5nVAj8DRu1nvfvbxmkbMuavAj5B2JvZFr3W+F7W/Qvg3WZWCLwbeMHd06/1IcJeymtm9pyZvX0/NfZkFGEPcV3GtMzf//jMurs9Phjdt0894dDchOgfke8SDjVuNbObzaw8WvSN/r5zmkJhEDKz0YTjqR8GrgIuMbMzAdy9gXAc/t0Hs05330r47/od0aRNhC/b9Gsa4TDGxmjeJDPL/PxMjubh7s+5+0WEQyy/Be5Kv8zB1HQQNgCPRwGXHkrd/SP9XM/XonXOcfdy4HLCoY207q+3v23c43Pc/Rfufnr0PKfrPwSZyy0lfKGeD1xGCIn0vJXuvpCwPf4D+LWZDev722QH4b/xKRnTOn7/hMM3EzPmTTqIdWfqvn2GASPp/Jz9j7ufRPhn5ijg09H03n7f0gcKhcHpu8Bv3f3RaJf5M8CPov8Kicb/ycyuiwIEM5sITOtthWY2ktAYuSSadBfw92Z2jpnlA58kNHY+RQidBuAzZpZvZgsIYXKnmRWY2fvMbHh0SKSWcNgDwn/YI81s+GHaDmn3AUeZ2fujevLN7E1mNquf6ykjNCLXmNkEoi+pDFuBIzLG97eN92FmM83sLdHvuYmwl9Le07KRXwAfA84ktCmk13O5mVVFeyY10eRe12NmRZkDYU/0LuAr0enQU4B/JewZpd/Xx81sgplVEBrHDyS/2+vkRfV/0MzmRu/5q8Cz7r42+v3Oj7ZbQ7Q92g/w+5a+iLtRQ8PBDcA7Cf9BVXSb/jDwlYzx+cADhD/6GmAx8BVgZDT/CsIfS/rMm22EY86jM9bxLkKD4R7gcaKG22jeMdG0PdEy74qmFwB/JBzGqgWeA07PeN6thEMANfR+9tGXM8YXcIDG6Wi5mYQzlrZH63+E0BZxUPXQc0NzXsby1WQ04hO+CD+fsU2ej7bnS4Qv+eqMZS8C1kev9ak+bOO1dG2YnkNo+6kjNMbf19M2zFh+MuEL/P5u038W/b7rCf8EvLOX5y+I3n/34UhC29XPou29AfgCnWcf5RH2ZHcSzj66lrBnYb28ztoeXuPL0byrCY3m6fc7MZp+DuGMo3o6z/QqPdDvW8OBB4s2sIhIVpjZ+cAP3H3KAReW2OnwkYgcVmZWbGYXmFledBjti4STHGQQ0J6CDApmdgbwh57meTh7SgYIMyshHAo7mtDucT/wcXevjbUw6ROFgoiIdNDhIxER6TDoOtkaNWqUT506Ne4yREQGleeff36Hu1cdaLlBFwpTp05l0aJFcZchIjKomNm6Ay+lw0ciIpJBoSAiIh0UCiIi0kGhICIiHRQKIiLSQaEgIiIdFAoiItIhZ0Jhw669/HnpVtSth4hI73ImFO6+6fOc8ss5NDY1xl2KiMiAlTOhcMbMcZRaE7t3bD3wwiIiOSpnQiG/LHT50bBboSAi0pucCYXC4SEU9tZsj7kSEZGBK2dCYVjFaACa67bFXImIyMCVM6FQPmIMAG11O2OuRERk4MqZUCirDHsKqQaFgohIb3ImFCy/iAaKsMZdcZciIjJg5UwoANRZOXlNCgURkd7kVCg05A2noHVP3GWIiAxYWQsFM7vVzLaZ2eJe5puZ/Y+ZrTKzV8zsxGzVktaUX0FJa022X0ZEZNDK5p7CbcB5+5l/PjAjGq4EbspiLQC0FFRSmtKegohIb7IWCu7+BLC/A/gXAT/14BmgwszGZasegFTRCIZ7HamUOsUTEelJnG0KE4ANGePV0bR9mNmVZrbIzBZt3/4GrkguGUGZNVLXsPfQ1yEiMoTFGQrWw7Qe/4V395vdfZ67z6uqqjrkF0yWjgJgzy71fyQi0pM4Q6EamJQxPhHYlM0XzC8PoVC3W11diIj0JM5QuBf4x+gspFOAPe6+OZsvWFwe9jIaaxQKIiI9yeYpqXcATwMzzazazD5kZleb2dXRIg8Aq4FVwI+A/5OtWtLSneLd98yr2X4pEZFBKS9bK3b3hQeY78BHs/X6PRk+KpzcdPbEnpozREQkp65oLho+hhSG1+ueCiIiPcmpUCCZR62VkdyrUBAR6UluhQJQl6yksFndZ4uI9CTnQmFv/giGtSoURER6knOh0FI0ivJ2dYonItKTnAuF9pIqRlBDU2t73KWIiAw4ORcKidLRDLNmduzUzXZERLrLuVDIHz4WgJodG2OuRERk4Mm5UCiqDBewNezMajdLIiKDUs6FQtnI8QA01WyJuRIRkYEn50JheFUIhfZadZ8tItJdzoVCfvkYALxBPaWKiHSXc6FAMj/q6mJH3JWIiAw4uRcKRF1dNCkURES6y8lQ2FtYRWmrQkFEpLucDIWWkjGMTO2krT0VdykiIgNKToaCl46jihp21DXFXYqIyICSk6GQXzmefGtnxzZd1SwikiknQ6FoxCQAareui7kSEZGBJSdDoWLMZAD27qyOuRIRkYElJ0OhfHQIhbYa9X8kIpIpJ0PBSsfQTgKr2xx3KSIiA0pOhgLJPPYkKihoVKd4IiKZcjMUgLr8KkqadQGbiEimrIaCmZ1nZsvNbJWZXdfD/Clm9rCZvWJmj5nZxGzWk6mpeAwVbTtw9/56SRGRAS9roWBmSeB7wPnAbGChmc3uttg3gJ+6+xzgBuBr2aqnu1TpGEazi5q9rf31kiIiA1429xROBla5+2p3bwHuBC7qtsxs4OHo8aM9zM+a5M6VVFo9G7fv7q+XFBEZ8LIZChOADRnj1dG0TC8D74kevwsoM7OR3VdkZlea2SIzW7R9+/bDUlzx/CsA2L151WFZn4jIUJDNULAepnU/gP8p4CwzexE4C9gItO3zJPeb3X2eu8+rqqo6LMVVjjsCgHpd1Swi0iEvi+uuBiZljE8Eulwt5u6bgHcDmFkp8B5335PFmjoMGz0VgNZdCgURkbRs7ik8B8wws2lmVgBcCtybuYCZjTKzdA2fBW7NYj1dWPn4cAFbrbq6EBFJy1oouHsbcA3wILAMuMvdl5jZDWZ2YbTYAmC5ma0AxgBfyVY9+0jmU5McSfFedXUhIpKWzcNHuPsDwAPdpn0h4/GvgV9ns4b9qS8aR3nD1rheXkRkwMnZK5oBWkonMDa1jdomXasgIgI5Hgo2fCJjbRcbd9bHXYqIyICQ06FQOGoqBdbO9s3r4y5FRGRAyOlQqBg3DYA9W9bEXImIyMCQ06FQOjqEQuPLd8dciYjIwJDToWCVUwFIFFfGW4iIyACR06FAQQl7kiMpadhw4GVFRHJAbocCUFcykVGtm2hpS8VdiohI7HI+FNqGT2WybaV69964SxERiV3Oh0J+1RGMtd2s37oz7lJERGKX86FQPv4oAHZVr4i5EhGR+OV8KJSOnQFA0zbdbEdEJOdDwUaEm+2kduoCNhGRnA8FSkbQmBhGYZ26uhARUSiYUVcymTGt1eotVURynkIBaB8xgyMSm1m1Tb2likhuUygAReOOZqLtYM3GbXGXIiISK4UCUD7pGABqqpfFXImISLwUCkBy9EwA2ra+FnMlIiLxUigAjDiCFAkKanStgojkNoUCQF4htUUTqGpeT31zW9zViIjERqEQaR1xJNNtE69tro27FBGR2CgUIsV71nCEbWHZxl1xlyIiEhuFQmTYuZ+h0FrZvk5nIIlI7spqKJjZeWa23MxWmdl1PcyfbGaPmtmLZvaKmV2QzXr2x8YeC0Db5sVxlSAiErushYKZJYHvAecDs4GFZja722KfB+5y9xOAS4HvZ6ueAxo1kxRJSvcsp61dd2ETkdyUzT2Fk4FV7r7a3VuAO4GLui3jQHn0eDiwKYv17F9+EfWlU5jh61m7syG2MkRE4nTAUDCz6WZWGD1eYGYfM7OKPqx7ArAhY7w6mpbpeuByM6sGHgD+pZcarjSzRWa2aPv27X146UPjY45hpq3nqtufz9priIgMZH3ZU7gbaDezI4H/BaYBv+jD86yHad5tfCFwm7tPBC4AbjezfWpy95vdfZ67z6uqqurDSx+a0klzmJzYzrnTS7L2GiIiA1lfQiHl7m3Au4D/dvdrgXF9eF41MCljfCL7Hh76EHAXgLs/DRQBo/qw7qxIjjsOgD3rXo6rBBGRWPUlFFrNbCHwAeC+aFp+H573HDDDzKaZWQGhIfnebsusB84BMLNZhFDI3vGhAxkbQqF4xxJa2tTYLCK5py+h8EHgVOAr7r7GzKYBPzvQk6K9i2uAB4FlhLOMlpjZDWZ2YbTYJ4EPm9nLwB3AFe7e/RBT/ymfQHPhCGb5apZvqYutDBGRuOQdaAF3Xwp8DMDMKoEyd/96X1bu7g8QGpAzp32h27rffDAFZ5UZqbHHc9yaVSyqruG4icPjrkhEpF/15eyjx8ys3MxGAC8DPzazb2a/tHgUTT6JGbaRHzz0atyliIj0u74cPhru7rXAu4Efu/tJwLnZLSs+Nv4E8izF8fkbDrywiMgQ05dQyDOzccAldDY0D13jTwBgVN0yttU2xVyMiEj/6kso3EBoLH7d3Z8zsyOAldktK0bl42lNFHJ8YjV/W6seU0UktxwwFNz9V+4+x90/Eo2vdvf3ZL+0mJiRPPIcTkqs4rk1CgURyS19aWieaGa/MbNtZrbVzO42s4n9UVxcEpPnM9U2s3z1mrhLERHpV305fPRjwkVn4wl9F/0+mjZ0TZoPQPmOF9jd0BJzMSIi/acvoVDl7j9297ZouA3IXgdEA8H4uaQS+ZxoK3nq9Z1xVyMi0m/6Ego7zOxyM0tGw+XA0P6mzC/Gxs3lTcmVPLlqR9zViIj0m76Ewj8RTkfdAmwGLiZ0fTGkWe1G5tgq/rZyY9yliIj0m76cfbTe3S909yp3H+3u7yRcyDa0/f2N5NPGqJrFrNNNd0QkRxzqndf+9bBWMRBNOQ3HOCWxlMdXxNdxq4hIfzrUUOjpBjpDS3EFNm4OC4qW8/CybXFXIyLSLw41FOLr3ro/TT2D41IreP71zTQ0t8VdjYhI1vUaCmZWZ2a1PQx1hGsWhr6pp5PnLRzny3UWkojkhF5Dwd3L3L28h6HM3Q94H4YhYcqb8UQeZyZf5XO/UVfaIjL0Herho9xQVI5Nms+Fw5bR2NJOY0t73BWJiGSVQuFAjjyHCU0rKWnZyZ+Wbom7GhGRrFIoHMj0cwC4sOw17nlBF7KJyNCmUDiQsXMgkc+FLffz+IrtbKvTjXdEZOjqS9fZPZ2FtCHqTvuI/igyVokEnPiPHJe/iWKauPelTXFXJCKSNX3ZU/gm8GlCt9kTgU8BPwLuBG7NXmkDyDHvItHWyBVVK/jNizqEJCJDV19C4Tx3/6G717l7rbvfDFzg7r8EKrNc38Aw5TQoHcMpex9jyaZaXttSG3dFIiJZ0ZdQSJnZJWaWiIZLMublxpXNiSRYgjPb/0ZlXjM/f2Z93BWJiGRFX0LhfcD7gW3R8H7gcjMrBq7Z3xPN7DwzW25mq8zsuh7mf8vMXoqGFWZWcwjvoX9cfCtGik9OeZ17XqimXt1eiMgQ1Jeus1e7+zvcfVQ0vMPdV7l7o7s/2dvzzCwJfA84H5gNLDSz2d3Wfa27z3X3ucB3gHve2NvJokmnQNk43p58loaWdu55oTruikREDru+nH00MTrTaJuZbTWzu81sYh/WfTKwKgqVFkLD9EX7WX4hcEffyo5BIgGz38nwjY9zyvh8/vfJNbSncuPomYjkjr4cPvoxcC+hE7wJwO+jaQcyAdiQMV4dTduHmU0BpgGP9DL/SjNbZGaLtm+P8d4G6/6KtbfwuWkrWLdzL39YvDm+WkREsqAvoVDl7j9297ZouA2o6sPzerrnQm//Wl8K/Nrde+xcyN1vdvd57j6vqqovL50lVz0BI2dw7Lb7KMpL8Jlfv4K79hZEZOjoSyjsMLPLzSwZDZcDO/vwvGpgUsb4RKC3K78uZSAfOkozg7mXYRue5ptvLWdvSzsPLlF/SCIydPQlFP4JuATYAmwGLgY+2IfnPQfMMLNpZlZA+OK/t/tCZjaTcL3D030tOlbHLwRLcF7LQ0yvGsaNf1qhtgURGTL6cvbRene/0N2r3H20u78TeHcfntdGOGX1QWAZcJe7LzGzG8zswoxFFwJ3+mA5DlM+Do46j8SLt1NAGyu31XPXog0Hfp6IyCBgh/JdbGbr3X1yFuo5oHnz5vmiRYvieOlOrz8Ct78Lf9cPee8zU1i1rZ5HP7mA4SX58dYlItILM3ve3ecdaLlD7SW1p0bk3DFtAYycgT37Q65/+2x2NbRw7rcej7sqEZE37FBDYXAc6smWRALmXwWbXmB2yytcddYRbK9r5rHl2+KuTETkDek1FHrpMrvWzOoI1yzkthMuh2FV8OQ3ufbcoyjOT/Lhny5iz97WuCsTETlkvYaCu5e5e3kPQ5m75/VnkQNSfjHkD4PXH6Fo64vcddWpuMMX710cd2UiIodMd157Iz7yJAwbDX/6fxw3oZwx5UX89qVN3PWczkYSkcFJofBGFJbBgutg/VOw/AEe//QCTj9yFJ/77assWrsr7upERA6aQuGNOvEDkFcMv/4QebTz3ctOIGHGpTc/w8aaxrirExE5KAqFNyqZBxffCm2N8MJPqCgp4P6PnU7KnXNvfJzdDS1xVygi0mcKhcNh5vlQWA5/+L+wdxdHji7jJ/90Mu3uvP/WZ3VGkogMGgqFw8EMPvgAeAoe+XcAzphRxbSRJSzZWBuCoVHBICIDn0LhcBl7HJSOhUW3wrrQt9+D157FLR+Yx6vVezjlqw+zYdfemIsUEdk/hcLhdM3foHIq3HMlNIbbTZ8zaww//+f5tLSnOPsbj/H8Op2VJCIDl0LhcCosg/f8L+xZD9+ZB1Fng6cdOYo/XXsmyYRx8U1Pc+uTa3RzHhEZkBQKh9vEeXD252Hvdnjllx2Tp1eV8sxnz+GcWaO54b6lzL3hIXbUN8dYqIjIvhQK2XDGv4azkX77Edi6pGNy5bACfvSP87jhomOobWrllK8+zO3PrNNNekRkwDik+ynEaUDcT6EvajfDzQsgrxCufAxKRnSZvXJrHe+56Slqm9ooKUgydWQJD3z8zDgqFZEckO37KciBlI+DS38OdZvhVx+A9rYus2eMKePlL76N7152Am3tztLNdZz07w/x+vb6mAoWEVEoZNfEefCOb8OaJ+BbszsantPMjLfPGc9LX3wr44cXsbOhhXNufJx//eVLrN3REFPRIpLLdPioPzzyZXjiv2D+R+C8r4WL3Xqwo76ZHz7+Orc8uQZ3GDmsgO8sPIFTjhhJIpHbN7sTkTemr4ePFAr9wR0e/Dd45vtwxqfgLZ/vNRgAttU1ceF3nmRLbTg7qTAvQVVZIb+6+lTGDS/ur6pFZAhRKAw07nDjTKjfCqd8FN725XBbz/1oam3nj4u38IXfLaa2KbRJlBflcf2Fx3Du7DGUF+X3R+UiMgQoFAaiVAoe/Cw8+wM49mJ4502QV9Cnp67b2cBlP3qGHfUtNLelgBAQn3zbTM6ZNZqJlSXZrFxEBjmFwkDlDk9+Cx7+EhxxNrz39nAldJ+f7rywfjfX/OJFttY2kb7EIWEwbngRP7h8HsdOKMf2c3hKRHKPQmGge/HncO+/wNhj4b0/g4rJh7Sa1dvr+fOyrXzn4VXUNYdDTPlJwx3GVxRx4yVzOXb8cIoLkoezehEZZAZEKJjZecC3gSRwi7t/vYdlLgGuBxx42d0v2986h0woAKx4EO7+59DofNH3Ydbb39DqdjW08Ohr2/jKA8vYvbelyxmwJQVJSgvz+MS5R3HUmFJmjC5jeInaJERyReyhYGZJYAXwVqAaeA5Y6O5LM5aZAdwFvMXdd5vZaHfftr/1DqlQANi1Gn71Qdj8Esy/Gt56Q7gK+jDYXtfM+255hvrmNhqa29jT2PUCOgMSCWNUaQEfO2cGM0aXMWN0KZXD+tbOISKDx0AIhVOB693976LxzwK4+9cylvlPYIW739LX9Q65UABoa4Y/Xx9OWR07B959M4yeddhfJpVyNtY0smpbPV/43WI21zaRiholMrtfyksYJQVJivOTXL1gOpMqS5g0ooSJlcUMK8w77HWJSPYNhFC4GDjP3f85Gn8/MN/dr8lY5reEvYk3Ew4xXe/uf+xhXVcCVwJMnjz5pHXr1mWl5ti9dn9oZ2iug7M/B6f9CySy3xbg7mza08TKrXV84XeLaWxpp7G1nfrm9h6XL85Pkp809ra0U1VWyIfPOIIx5UWMLi9kdFkho8uK1IYhMsAMhFD4B+DvuoXCye7+LxnL3Ae0ApcAE4G/AMe6e01v6x2SewqZ6rfD/dfCst9DwTBY+EuYdkYspbg7Oxta2LBrLxt2N7Jh115+8tRaWttTtLSn2NvcTm+fnmTCcHfMjIrifN5z0sQQGOVFjIl+ji4r1J6HSD/payhk8y+yGpiUMT4R2NTDMs+4eyuwxsyWAzMI7Q+5qbQKLrkdltwDv/kI/OTtMPMCOPdLUHVUv5ZiZowqLWRUaSEnTK4E4KNnH9llmVTKqWlsZVtdE1trm/nSvUtoaU/R2p5iZ30LKXfqm9v40V9Wd+/6qUNBMkFe0mhuS2GEdveq0kI+cNpUyoryKSvKi4Z8yqOfZUV5lBQkdeqtyGGWzT2FPMKhoXOAjYQv+svcfUnGMucRGp8/YGajgBeBue6+s7f1Dvk9hUytjfDMTaHvJG+HN30YFlwHw0bFXdlBc3dqG9s6wmNbXRPfemgFW2qbGF6cT1vKqW1sDXseTq97ID0xC4e0kolwSMuAypICkolwRtbo8iKuOG0qJQUhSIryk6HNJGo3KS5IdrShFBckKUgmFDYy5MR++Cgq4gLgvwntBbe6+1fM7AZgkbvfa+Ev70bgPKAd+Iq737m/deZUKKTVb4fHvw7P3QKWDH0nzb8qHF4aolIpp6GljbqmNuqb26hraqW2KYzXNbVS19TGT59ay/b6ZtyhtCiP9pTT0NyGExrLU84h38AoYeEQWFvKMaC4IEnSjIaO0MknkTB2N7SAGWPLC/nwmdMpTCbIzzMKkqHdJT8vEU1LUJBMkJ9MUJCen2dhWjSvIJlQx4eSNQMiFLIhJ0MhbftyeOgLsOKPkMiDMz8NJ1+5zw18pKvW9hR7W9ppam1nb0t71JDeRmNLir0tbTS2hmk3PfY6m2ubwJ0RwwpIedjTABhWmNcRVAB5iQQpd9qyeNe8/KRhGK2pcFitKD+JGTS1tIMZZYV5mEFdUxsVJfmcO2sM+R3Bk6AgaRTkJbpNS3RMCz+th2mJjkN6yYRhBgkzkmYkzEgkovGEkZdIL6MwG+gUCkPZ+mdCVxkr/giWgJOvglMoyrhvAAAQ8ElEQVQ/ChWTDvxcOezaU94RLK3tKVraQptKc/QzjHuXad96aAWOk3LYsqcJJ7SjuDvbo3t3V5YU4O7s3tsKhL0hd6hvbsPdKc5PknJobmvv2DtyJ6tBdSDJhGF07qEV5CUwg5aov67igiSG0djazrCCJCdNqSQvGcIpL5EgL2HkJY28ZHiciIIoGQVRIpEOJzofJ7otEwVWepmOQIumJaNp6UDLTyY6Ai4v43EyGtLSuRfeYeZ40FlbCM5kNG5RPdZtPZnrInNa9/VatE0SCRLGIQewQiEXbF0Kf/02vHInYHD8Qnjzx2H00XFXJgOAR3sy6WBqyQiodHC1ZIRY1zBz2tpT4RCcO+5OKuW0e1hvei+pLVquNeX85oWNOI47HVfUDy/Ox6Gjvai0MA/3zsN8xflJnNAjsAP5yUSou9072pXS38u6lTlMHVnCY58++5Ceq1DIJTXr4envwd9uBk/BUefD6dfC5PlxVyaSNalUCKd2d1IpMh57R3tSKgqw9lQIq/ZU5zLtUfi0pZz2VAjC9lQ67MJ4+pSH9Ndk+tuyc7xzfudrRY/Tr+V0XCSa/r7ddz10mZ8pXVN7ylkws4o5EysOaXspFHJRw84QDH/5BqTaYPKp4QK4GW+DpPo5EsllCoVc1tIAL/wUHvoitDdDyUg49j0w570w4aT93vVNRIYmhYJAeyuseji0OSz5LeAwYnoIhzmXwIhpcVcoIv1EoSBdNe2BpffCg5+D5j1h2qT5IRxmv3NQXhAnIn2nUJDe7amGV38FL/8Sti8L0448NxxiOvrvoWh4vPWJyGGnUJADc4etS2Dxr+Hp74f2BywEw1HnhaAoHxd3lSJyGAyEDvFkoDMLtwMdeyyc80WoXgSL74alv4PX7gvLjD0OjnwrzHgrTDwZkvrIiAxl2lOQfbnDtqWw8k/wl29Cc22YnkjC0e8IAXHkuVA2Nt46RaTPtKcgh84MxhwThtOvDY3Uqx8LIfHKXbD0t2G5sXPCNRBHnhtOdc3TbTxFBjvtKcjBcYctr8Kqh2Dln2H9U2F6XnG4gnrq6TD1DBh/okJCZABRQ7P0j8YaWPMErPsrrH0Sti4O09MhMeX0EBQTToS8wnhrFclhOnwk/aO4AmZfGAYIXW2sfyoExJq/wKNfDtPzimDSyVFIvBkmzIP8ovjqFpEeKRTk8Bo2Ema9IwwAe3fBuqeiPYm/wGNfDdOTBSEYpr4ZppwWzmwqLI2vbhEBFAqSbSUjYNbbwwDQuDvcD2LdX2HtX+GJ/+pcNr8Ejnk3jJ8L4+aGU2Xzi+OpWyRHKRSkfxVXwszzwwDQXAcbng1BselFeOWX8NLPOpfPDIrxJ4QzohQUIlmjUJB4FZaFU1qPPDeMu0PtRtj0UgiJzS/tGxRjjoPxx4e9ifEnRkGh9gmRw0GhIAOLGQyfGIb0IaeegmL5H+DFdFBEV2aPm9t56Gn0LCgYFtvbEBmsFAoy8PUWFHs2dA2K1+6DF2/vfN6I6aGbjnFzYNzxMPZ4KK2K5z2IDBIKBRmczKBichjSp8O6Q8062LI4dPS39dUQGOkrsAHKxmeExBwYMxsqpkIiEcvbEBloFAoydJhB5dQwpPcoIJzxtOVV2PwKbHkFNr8cuuzwVPS8BIw5NhxyqpoJVdHPyqmhvyeRHKJQkKGvuBKmnRmGtJa9odO/bctg+2thWPvX0KidllcEo46CqqNh9NHhZ9XRCgsZ0rIaCmZ2HvBtIAnc4u5f7zb/CuC/gI3RpO+6+y3ZrEkEgIISmDgvDJmaamHHiq5hsfR38OpdnctYIpzxlN6jGD1LYSFDRtZCwcySwPeAtwLVwHNmdq+7L+226C/d/Zps1SFyUIrK9x8W21/rDIz1T3cNi2RhtGcxMwzpxyOmq3NAGTSyuadwMrDK3VcDmNmdwEVA91AQGfh6C4vmOti+vGtYbPhbuJtdplEzQwP32DmdP0tG9F/9In2UzVCYAGzIGK8G5vew3HvM7ExgBXCtu2/ovoCZXQlcCTB58uQslCpyiArLeg6LlgbYsTLau1ge2i/WPRXujZ02fFJ0BtQxUZvFLBh5pPYqJFbZDAXrYVr3frp/D9zh7s1mdjXwE+At+zzJ/WbgZghdZx/uQkUOu4JhUdccc7tOb9gJW17OOBPqFVh+f+f8RF4IhlEzojOppsGIaeEQ1PBJOnVWsi6boVANTMoYnwhsylzA3XdmjP4I+I8s1iMSv2EjYfpbwpDW2gQ7V8K218IexfbXwt7Fij9Be3PncnnFMHJ6CIxRR8HIGTDiiBAaOhQlh0k2Q+E5YIaZTSOcXXQpcFnmAmY2zt03R6MXAsuyWI/IwJRfFK68Hntc1+mpFNRtgl1rYNfr0eGoleEq7iW/6bpsUUUUEN2GkdOhZGS4hkOkD7IWCu7eZmbXAA8STkm91d2XmNkNwCJ3vxf4mJldCLQBu4ArslWPyKCTSHR27zHtjK7z2pph1+ooMFZ3DtXPwZJ7Oi/MAygaHg5JdQzTw88R03UPC9mHbscpMtS0tUDN+igoXoedq6Lh9dBfVKaycfsGRuU0qJyiLsqHGN2OUyRX5RXAqCPD0F3LXti9pjModkQ/l/4OGnd1XbZsXGe3ISMy2jJGHKGuyocwhYJILikoCafAjjlm33l7d4W9id1ruw5rnoCX78hY0MKexMgoJEbN6AyMYVVqvxjkFAoiEpSMCMOkN+07r6Uh2rNY2Xn9xY6VsPZJaGvsXK5weNhDGXFEdBhqajg7qnIqlI7VKbWDgEJBRA6sYFjobnzc8V2np1JQWx2FxKpwau2OFeEWq4vv7trgnVcEFVO6BkU6ONSGMWAoFETk0CUSnfe1SN9SNa29NTR4714b2jF2rw1nS+1eB+v+Ci31XZfvaMPotodROVWHpfqRQkFEsiOZH53+On3fee6wd2dGUKztDI7Vj4XrMzLlFUH5BKiYFJ2mO6nzdN3hk8I8NX4fFgoFEel/ZjBsVBi69xsF4SrvmvWdQbFnA+ypDsPKP0P9ln2fM2x0CImKSZ2hUTE57GlUTNE1GX2kUBCRgSe/CKqOCkNP2pqhdlMUFBu6/ty2LHQRktkADlA8ImMvY0LX0KicBsUVWX9bg4FCQUQGn7zCqKPAaT3PTx+e6rK3Ee1p7F4Tzppq3tP1OUUVPTeCj5gWDk/lyA2UFAoiMvRkHp6acGLPyzTtyWgIX9vZtrH5ZVj2e0i1dS6byA+HpXpqBK+cGrpQHyIUCiKSm4qG99wRIUCqPdqrWLvv2VMbn4emmq7LF1eGw1LpM7E6HkftG8WVg+bsKYWCiEh3iWS4dqJyCnDWvvMbd3e96rtmQ2jT2LkKXn8UWhu6Ll9QGgXFpG6BEf0cNnrAXNinUBAROVjFlWEYf8K+89xDlyF71neGRU368fpwu9buexrJgs5G8I6wiAKjYjKUjYdk/3xdKxRERA4ns3AzpWEjew4NCPf27hIY66PHG2DlQ1C/tds6k+GMqXO+CMddnNXyFQoiIv2tsAzGzA5DT1qboHYj1KzrDI2a9eHK7ixTKIiIDDT5Rb1fDZ5lA6NlQ0REBgSFgoiIdFAoiIhIB4WCiIh0UCiIiEgHhYKIiHRQKIiISAeFgoiIdDB3j7uGg2Jm24F1h/j0UcCOw1jOYKXt0EnbItB2CIbydpji7ge8JHrQhcIbYWaL3L2He//lFm2HTtoWgbZDoO2gw0ciIpJBoSAiIh1yLRRujruAAULboZO2RaDtEOT8dsipNgUREdm/XNtTEBGR/VAoiIhIh5wJBTM7z8yWm9kqM7su7nr6k5mtNbNXzewlM1sUTRthZg+Z2croZ2XcdR5uZnarmW0zs8UZ03p83xb8T/T5eMXMToyv8sOrl+1wvZltjD4TL5nZBRnzPhtth+Vm9nfxVH34mdkkM3vUzJaZ2RIz+3g0Pec+E/uTE6FgZknge8D5wGxgoZn1ch+8Ietsd5+bcQ72dcDD7j4DeDgaH2puA87rNq23930+MCMargRu6qca+8Nt7LsdAL4VfSbmuvsDANHfxaXAMdFzvh/9/QwFbcAn3X0WcArw0ej95uJnolc5EQrAycAqd1/t7i3AncBFMdcUt4uAn0SPfwK8M8ZassLdnwB2dZvc2/u+CPipB88AFWY2rn8qza5etkNvLgLudPdmd18DrCL8/Qx67r7Z3V+IHtcBy4AJ5OBnYn9yJRQmABsyxqujabnCgT+Z2fNmdmU0bYy7b4bwxwKMjq26/tXb+87Fz8g10WGRWzMOH+bEdjCzqcAJwLPoM9FFroSC9TAtl87FfbO7n0jYHf6omZ0Zd0EDUK59Rm4CpgNzgc3AjdH0Ib8dzKwUuBv4hLvX7m/RHqYNqW3Rk1wJhWpgUsb4RGBTTLX0O3ffFP3cBvyGcDhga3pXOPq5Lb4K+1Vv7zunPiPuvtXd2909BfyIzkNEQ3o7mFk+IRB+7u73RJP1mciQK6HwHDDDzKaZWQGhIe3emGvqF2Y2zMzK0o+BtwGLCe//A9FiHwB+F0+F/a63930v8I/RGSenAHvShxSGom7Hxt9F+ExA2A6XmlmhmU0jNLL+rb/rywYzM+B/gWXu/s2MWfpMZMiLu4D+4O5tZnYN8CCQBG519yUxl9VfxgC/CX8P5AG/cPc/mtlzwF1m9iFgPfAPMdaYFWZ2B7AAGGVm1cAXga/T8/t+ALiA0LC6F/hgvxecJb1shwVmNpdwOGQtcBWAuy8xs7uApYSzdT7q7u1x1J0FbwbeD7xqZi9F0/6NHPxM7I+6uRARkQ65cvhIRET6QKEgIiIdFAoiItJBoSAiIh0UCiIi0kGhICIiHRQKIn1gZnO7dS994eHqgt3MPmFmJYdjXSJvlK5TEOkDM7sCmOfu12Rh3Wujde84iOckh9BFZTKAaE9BhhQzmxrdROVH0Y1U/mRmxb0sO93M/hj1HvsXMzs6mv4PZrbYzF42syeirlFuAN4b3ZDmvWZ2hZl9N1r+NjO7KbqBy2ozOyvqeXSZmd2W8Xo3mdmiqK4vRdM+BowHHjWzR6NpCy3cFGmxmf1HxvPrzewGM3sWONXMvm5mS6OeTr+RnS0qOcfdNWgYMgMwldA9w9xo/C7g8l6WfRiYET2eDzwSPX4VmBA9roh+XgF8N+O5HeOEm9jcSehV8yKgFjiO8E/X8xm1jIh+JoHHgDnR+FpgVPR4PKGrhSpCtySPAO+M5jlwSXpdwHI69/Yr4t72GobGoD0FGYrWuHu6b5vnCUHRRdR98mnAr6J+cH4IpDuJ+ytwm5l9mPAF3he/d3cnBMpWd3/VQw+kSzJe/xIzewF4kXBns57u/vcm4DF33+7ubcDPgXRX5+2EHj4hBE8TcIuZvZvQN4/IG5YTHeJJzmnOeNwO9HT4KAHUuPvc7jPc/Wozmw/8PfBS1HFcX18z1e31U0Be1OPop4A3ufvu6LBSUQ/r6akP/7Qmj9oRPHTyeDJwDqHX32uAt/ShTpH90p6C5CQPN1dZY2b/AB03aT8+ejzd3Z919y8AOwh96tcBZW/gJcuBBmCPmY0h3PAoLXPdzwJnmdmo6N7IC4HHu68s2tMZ7uHeyp8g3CxH5A3TnoLksvcBN5nZ54F8QrvAy8B/mdkMwn/tD0fT1gPXRYeavnawL+TuL5vZi4TDSasJh6jSbgb+YGab3f1sM/ss8Gj0+g+4e0/3uigDfmdmRdFy1x5sTSI90SmpIiLSQYePRESkgw4fyZBnZt8j3HUr07fd/cdx1CMykOnwkYiIdNDhIxER6aBQEBGRDgoFERHpoFAQEZEO/x8BEB81FXlB9wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x2e1b409eb70>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cvresult = pd.DataFrame.from_csv('1_nestimators.csv')\n",
    "        \n",
    "# plot\n",
    "test_means = cvresult['test-mlogloss-mean']\n",
    "test_stds = cvresult['test-mlogloss-std'] \n",
    "        \n",
    "train_means = cvresult['train-mlogloss-mean']\n",
    "train_stds = cvresult['train-mlogloss-std'] \n",
    "\n",
    "x_axis = range(0, cvresult.shape[0])\n",
    "        \n",
    "pyplot.errorbar(x_axis, test_means, yerr=test_stds ,label='Test')\n",
    "pyplot.errorbar(x_axis, train_means, yerr=train_stds ,label='Train')\n",
    "pyplot.title(\"XGBoost n_estimators vs Log Loss\")\n",
    "pyplot.xlabel( 'n_estimators' )\n",
    "pyplot.ylabel( 'Log Loss' )\n",
    "pyplot.savefig( 'n_estimators4_1.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## 二、调整树的参数：max_depth\n",
    "(粗调，参数的步长为2；下一步是在粗调最佳参数周围，将步长降为1，进行精细调整)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "前面参数调整得到的n_estimators最优值（232），其余参数继续默认值\n",
    "\n",
    "用交叉验证评价模型性能时，用scoring参数定义评价指标。评价指标是越高越好，因此用一些损失函数当评价指标时，需要再加负号，如neg_log_loss，neg_mean_squared_error 详见sklearn文档：http://scikit-learn.org/stable/modules/model_evaluation.html#log-loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'max_depth': range(4, 10, 2)}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#max_depth 建议3-10， min_child_weight=1／sqrt(ratio_rare_event) =5.5\n",
    "max_depth = range(4,10,2)\n",
    "#min_child_weight = range(1,6,2)\n",
    "#param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test2_1 = dict(max_depth=max_depth)\n",
    "param_test2_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.58533, std: 0.00408, params: {'max_depth': 4},\n",
       "  mean: -0.57706, std: 0.00390, params: {'max_depth': 6},\n",
       "  mean: -0.58614, std: 0.00507, params: {'max_depth': 8}],\n",
       " {'max_depth': 6},\n",
       " -0.5770643276389318)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import StratifiedKFold     #import必要的模块\n",
    "\n",
    "xgb2_1 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=232,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch2_1 = GridSearchCV(xgb2_1, param_grid = param_test2_1, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch2_1.fit(X_train , y_train)\n",
    "\n",
    "gsearch2_1.grid_scores_, gsearch2_1.best_params_,     gsearch2_1.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 三、调整树的参数：min_child_weight\n",
    "(粗调，参数的步长为2；下一步是在粗调最佳参数周围，将步长降为1，进行精细调整)\n",
    "精细调整略\n",
    "\n",
    "一次调试两个参数太慢，每次只调整一个参数\n",
    "为了加快速度，cv=3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "前面参数调整得到的n_estimators最优值（232），max_depth=6 ，其余参数继续默认值。\n",
    "\n",
    "用交叉验证评价模型性能时，用scoring参数定义评价指标。评价指标是越高越好，因此用一些损失函数当评价指标时，需要再加负号，如neg_log_loss，neg_mean_squared_error 详见sklearn文档：http://scikit-learn.org/stable/modules/model_evaluation.html#log-loss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#max_depth 建议3-10， min_child_weight=1／sqrt(ratio_rare_event) =5.5\n",
    "#max_depth = range(4,10,2)\n",
    "min_child_weight = range(1,6,2)\n",
    "#param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test3 = dict(min_child_weight=min_child_weight)\n",
    "param_test3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'min_child_weight': range(1, 6, 2)}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#max_depth 建议3-10， min_child_weight=1／sqrt(ratio_rare_event) =5.5\n",
    "#max_depth = range(4,10,2)\n",
    "min_child_weight = range(1,6,2)\n",
    "#param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test3 = dict(min_child_weight=min_child_weight)\n",
    "param_test3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.57706, std: 0.00390, params: {'min_child_weight': 1},\n",
       "  mean: -0.57781, std: 0.00453, params: {'min_child_weight': 3},\n",
       "  mean: -0.57807, std: 0.00451, params: {'min_child_weight': 5}],\n",
       " {'min_child_weight': 1},\n",
       " -0.5770643276389318)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb3 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=232,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch3= GridSearchCV(xgb3, param_grid = param_test3, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch3.fit(X_train , y_train)\n",
    "\n",
    "gsearch3.grid_scores_, gsearch3.best_params_,     gsearch3.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最佳结果在min_child_weight=5，所以继续测试更大的min_child_weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'min_child_weight': range(7, 10, 2)}"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "min_child_weight = range(7,10,2)\n",
    "#param_test2_1 = dict(max_depth=max_depth, min_child_weight=min_child_weight)\n",
    "param_test3_2 = dict(min_child_weight=min_child_weight)\n",
    "param_test3_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.57798, std: 0.00462, params: {'min_child_weight': 7},\n",
       "  mean: -0.57797, std: 0.00479, params: {'min_child_weight': 9}],\n",
       " {'min_child_weight': 9},\n",
       " -0.5779724134633093)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb3_2 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=232,  #第一轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=1,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch3_2= GridSearchCV(xgb3_2, param_grid = param_test3_2, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch3_2.fit(X_train , y_train)\n",
    "\n",
    "gsearch3_2.grid_scores_, gsearch3_2.best_params_,     gsearch3_2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最佳min_child_weight=7"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 四、调整正则化参数：reg_alpha\n",
    "\n",
    "行列采样参数略"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "前面已经调好的参数： n_estimators：232 max_depth：6 min_child_weight：7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reg_alpha': [0.1, 1, 2]}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reg_alpha = [ 0.1, 1, 2]    #default = 0, 测试0.1,1，1.5，2\n",
    "#reg_lambda = [0.5, 1, 2]      #default = 1，测试0.1， 0.5， 1，2\n",
    "\n",
    "param_test4 = dict(reg_alpha=reg_alpha)\n",
    "param_test4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.57739, std: 0.00414, params: {'reg_alpha': 0.1},\n",
       "  mean: -0.57786, std: 0.00474, params: {'reg_alpha': 1},\n",
       "  mean: -0.57750, std: 0.00461, params: {'reg_alpha': 2}],\n",
       " {'reg_alpha': 0.1},\n",
       " -0.5773876687260544)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb4 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=232,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=7,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch4 = GridSearchCV(xgb4, param_grid = param_test4, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch4.fit(X_train , y_train)\n",
    "\n",
    "gsearch4.grid_scores_, gsearch4.best_params_,     gsearch4.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "当reg_alpha取缺省值（0）时，logsloss=0.5777316363039906 比此时最佳的参数（0.1）还要低，以因此最佳的reg_alpha=0."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 五、调整正则化参数：reg_lambda\n",
    "\n",
    "行列采样参数略"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "前面已经调好的参数： n_estimators：232 max_depth：6 min_child_weight：7 reg_alpha：0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reg_lambda': [0.1, 1, 2]}"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#reg_alpha = [ 0.1, 1, 2]    #default = 0, 测试0.1,1，1.5，2\n",
    "reg_lambda = [0.1, 1, 2]      #default = 1，测试0.1， 0.5， 1，2\n",
    "\n",
    "param_test5 = dict(reg_lambda=reg_lambda)\n",
    "param_test5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.57763, std: 0.00417, params: {'reg_lambda': 0.1},\n",
       "  mean: -0.57798, std: 0.00462, params: {'reg_lambda': 1},\n",
       "  mean: -0.57802, std: 0.00491, params: {'reg_lambda': 2}],\n",
       " {'reg_lambda': 0.1},\n",
       " -0.5776311386486628)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb5 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=232,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=7,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        reg_alpha = 0,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch5 = GridSearchCV(xgb5, param_grid = param_test5, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch5.fit(X_train , y_train)\n",
    "\n",
    "gsearch5.grid_scores_, gsearch5.best_params_,     gsearch5.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "reg_lambda=2（-0.57761588435180056），所以继续测试更大的reg_lambda·"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'reg_lambda': [3, 4]}"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#reg_alpha = [ 0.1, 1, 2]    #default = 0, 测试0.1,1，1.5，2\n",
    "reg_lambda = [3, 4]      #default = 1，测试0.1， 0.5， 1，2\n",
    "\n",
    "param_test5_2 = dict(reg_lambda=reg_lambda)\n",
    "param_test5_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n",
      "  DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "([mean: -0.57763, std: 0.00508, params: {'reg_lambda': 3},\n",
       "  mean: -0.57780, std: 0.00428, params: {'reg_lambda': 4}],\n",
       " {'reg_lambda': 3},\n",
       " -0.5776262144867814)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb5_2 = XGBClassifier(\n",
    "        learning_rate =0.1,\n",
    "        n_estimators=232,  #第二轮参数调整得到的n_estimators最优值\n",
    "        max_depth=6,\n",
    "        min_child_weight=7,\n",
    "        gamma=0,\n",
    "        subsample=0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel = 0.7,\n",
    "        reg_alpha = 0,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "\n",
    "gsearch5_2 = GridSearchCV(xgb5_2, param_grid = param_test5_2, scoring='neg_log_loss',n_jobs=-1, cv=3)\n",
    "gsearch5_2.fit(X_train , y_train)\n",
    "\n",
    "gsearch5_2.grid_scores_, gsearch5_2.best_params_,     gsearch5_2.best_score_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "最佳reg_lambda=3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 六、再次直接调用xgboost内嵌的cv寻找最佳的参数n_estimators"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "改小此时学习率为0.02，调整弱分类数目\n",
    "前面已经调好的参数： n_estimators：232 max_depth：6 min_child_weight：7 reg_alpha：0 reg_lambda：3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#直接调用xgboost内嵌的交叉验证（cv），可对连续的n_estimators参数进行快速交叉验证\n",
    "#而GridSearchCV只能对有限个参数进行交叉验证\n",
    "def modelfit(alg, X_train, y_train, cv_folds=3, early_stopping_rounds=10):\n",
    "    xgb_param = alg.get_xgb_params()\n",
    "    xgb_param['num_class'] = 3\n",
    "    \n",
    "    #直接调用xgboost，而非sklarn的wrapper类\n",
    "    xgtrain = xgb.DMatrix(X_train, label = y_train)\n",
    "        \n",
    "    cvresult = xgb.cv(xgb_param, xgtrain, num_boost_round=alg.get_params()['n_estimators'], folds =cv_folds,\n",
    "             metrics='mlogloss', early_stopping_rounds=early_stopping_rounds)\n",
    "  \n",
    "    cvresult.to_csv('6_nestimators.csv', index_label = 'n_estimators')\n",
    "    \n",
    "    #最佳参数n_estimators\n",
    "    n_estimators = cvresult.shape[0]\n",
    "    \n",
    "    # 采用交叉验证得到的最佳参数n_estimators，训练模型\n",
    "    alg.set_params(n_estimators = n_estimators)\n",
    "    alg.fit(X_train, y_train, eval_metric='mlogloss')\n",
    "        \n",
    "    #Predict training set:\n",
    "    train_predprob = alg.predict_proba(X_train)\n",
    "    logloss = log_loss(y_train, train_predprob)\n",
    "\n",
    "   #Print model report:\n",
    "    print ('logloss of train is:', logloss)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of train is: 0.47994618420999946\n"
     ]
    }
   ],
   "source": [
    "#params = {\"objective\": \"multi:softprob\", \"eval_metric\":\"mlogloss\", \"num_class\": 9}\n",
    "xgb6 = XGBClassifier(\n",
    "        learning_rate =0.02,\n",
    "        n_estimators=2000,  #数值大没关系，cv会自动返回合适的n_estimators\n",
    "        max_depth=6,\n",
    "        min_child_weight=7,\n",
    "        gamma=0,\n",
    "        subsample = 0.5,\n",
    "        colsample_bytree=0.8,\n",
    "        colsample_bylevel=0.7,\n",
    "        reg_alpha = 0,\n",
    "        reg_lambda = 3,\n",
    "        objective= 'multi:softprob',\n",
    "        seed=3)\n",
    "\n",
    "modelfit(xgb6, X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'base_score': 0.5,\n",
       " 'booster': 'gbtree',\n",
       " 'colsample_bylevel': 0.7,\n",
       " 'colsample_bytree': 0.8,\n",
       " 'gamma': 0,\n",
       " 'learning_rate': 0.02,\n",
       " 'max_delta_step': 0,\n",
       " 'max_depth': 6,\n",
       " 'min_child_weight': 7,\n",
       " 'missing': None,\n",
       " 'n_estimators': 1311,\n",
       " 'nthread': 1,\n",
       " 'objective': 'multi:softprob',\n",
       " 'reg_alpha': 0,\n",
       " 'reg_lambda': 3,\n",
       " 'scale_pos_weight': 1,\n",
       " 'seed': 3,\n",
       " 'silent': 1,\n",
       " 'subsample': 0.5}"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb6.get_xgb_params()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: from_csv is deprecated. Please use read_csv(...) instead. Note that some of the default arguments are different, so please refer to the documentation for from_csv when changing your function calls\n",
      "  \"\"\"Entry point for launching an IPython kernel.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEXCAYAAABCjVgAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmcXFWZ//HPU9Vrekkn6c7aCQkQloDIElYR4oaACm6D4DLiqKgjOug4Dgz+HH/8hnEdxwWUQQdwXEDcEXFQAcUFkAQIeyBAls7WCUkn3Ul6ref3xznVXelUJ52kq2911/f9et1X3XvuqVtP3equp+45955r7o6IiAhAKukARESkeCgpiIhIPyUFERHpp6QgIiL9lBRERKSfkoKIiPRTUhDJYWb/YmbfTjoOkaQoKYwxZlZrZivM7O05ZXVmtsrM3ppTttDMbjezLWbWZmZPmtnVZjYprr/YzPrMrCNOz5vZhwoc+yIzaynka+yLfPG4+7+7+/sK9HorzOzVhdh2IYzW5zXW9st4p6Qwxrh7B3AJ8FUza4rFXwAWu/uPAczsNOD3wJ+BI9y9ATgb6AVemrO5+9y91t1rgbcCXzCz40bnnci+MLOypGOQEuHumsbgBNwE3AwsAl4EZuSs+xPw9b08/2LgT4PK/gq8PWf5POAJoI2QZI7MWXdkLGuLdc7LWXcu8CTQDqwBPgHUADuBDNARp5lDvK9rgV/F5z8AHDKM/XEE8FtgM7AMuGB/4gE+A3wvPm8u4MB7gNXAFuCDwInAo/G9X5PzOocAd8fPYxPwfaAhrvtufK2d8bU+OYx9vAL45/haXUBZXF4T38sy4FV59sUpwHognVP2JuDROH8SsBjYBmwAvjzEPl0EtAyxbiLwP8BGYCXwKSAV16WB/4j74AXg0rgfy4bY1grg1UOsez+wPH6ut2X/ZgAD/hNoBbbGfXT0UJ930v+vY2lKPABN+/nBwSRgXfzHe09OeQ3QByzay/MvJicpxC+6NuCwuHwYsB14DVAOfDL+c1bE5eXAv8TlV8Z/wMPjc9cBL8+J8/g4P+SXTE4cN8UvgJPil+D3gVv28pwawpf2e+Jzjo/75ah9jYf8SeE6oAo4C+gEfg5MBWbFL6UzY/1D4/6qBJqAe4Gv5Gx7ly+/Pe3jnPqPALOBauDw+D5n5sSXN2ECzwGvyVn+EXB5nL8PeFecrwVOGWIbQ35ehITwC6AuxvEM8N647oOEL+XmuL9/x34khfh3tSl+npXA14F747rXAkuABkKCOJL4w2ioz1vT8CY1H41R7r6F8AtzAvDTnFWTCM2C67MFZvaF2K+w3cw+lVP3lFjeQThK+C7wbFz3NuBX7v5bd+8BvkT4YjqN8Eu0Fvicu3e7+93A7cBF8bk9wAIzq3f3Le7+0D6+vZ+6+1/dvZeQFI7dS/3XAyvc/UZ3742v9xNCk9hIxPP/3L3T3X9D+BK/2d1b3X0N8EfgOAB3Xx73V5e7bwS+DJy5h+3uaR9nfc3dV7v7TkKyr4zvpdzdV7j7c0Ns+2bi52FmdYRfzzfn7I9DzazR3Tvc/f592Rlmlo6xX+Hu7e6+gnBk8K5Y5QLgq+7eEv9OP7cv28/xDuAGd3/I3buAK4BTzWxufA91hCNEc/en3H1dzvs7kM+7pCkpjFFm9k7CL7TfAZ/PWbWF0EwxI1vg7p/00K/wM8Iv6az73b3BQ5/CdOAo4N/jupmEZoHsNjKEX6mz4rrVsSxrZVwH8BbCl9BKM/uDmZ26j29vfc78DkIC2pODgJNjgmszszbCF8r0EYpnQ878zjzLtQBmNtXMbjGzNWa2Dfge0LiH7e5pH2etzlm/HLiMcDTTGl9r5hDb/gHwZjOrBN4MPOTu2dd6L+Eo5Wkze9DMXr+HGPNpJBwhrswpy/38Z+bGPWh+XwzePx2EprlZ8YfINYSmxg1mdr2Z1ceqB/p5lzQlhTHIzKYS2lPfD3wAuMDMzgBw9+2Edvg378s23X0D4df1G2LRWsKXbfY1jdCMsSaum21muX8/c+I63P1Bdz+f0MTyc+DW7MvsS0z7YDXwh5jgslOtu39olOP5bNzmMe5eD7yT0LSRNfj19rSP8z7H3X/g7qfH5zm7/iDIrfck4Qv1HODthCSRXfesu19E2B+fB35sZjXDf5tsIvwaPyinrP/zJzTfNOesm70P2841eP/UAFMY+Dv7mrufQPgxcxjwT7F8qM9bhkFJYWy6Bvi5u98TD5k/CXwr/iokLv+dmV0eEwhm1gzMG2qDZjaF0Bn5RCy6FXidmb3KzMqBfyR0dv6FkHS2A580s3IzW0RIJreYWYWZvcPMJsYmkW2EZg8Iv7CnmNnEEdoPWbcDh5nZu2I85WZ2opkdOcrx1BE6kdvMbBbxSyrHBuDgnOU97ePdmNnhZvbK+Dl3Eo5S+vLVjX4AfBQ4g9CnkN3OO82sKR6ZtMXiIbdjZlW5E+FI9Fbg6ng69EHAxwlHRtn39Q9mNsvMGgid43tTPuh1ymL87zGzY+N7/nfgAXdfET/fk+N+2x73R99ePm8ZjqQ7NTTt2wS8kfALqmFQ+V3A1TnLJwN3EP7p24DHgauBKXH9xYR/luyZN62ENuepOdt4E6HDcCvwB2LHbVx3VCzbGuu8KZZXAP9LaMbaBjwInJ7zvBsITQBtDH320b/lLC9iL53Tsd7hhDOWNsbt303oi9ineMjf0VyWU7+FnE58whfhp3L2yZK4Px8hfMm35NQ9H1gVX+sTw9jHK9i1Y/oYQt9PO6Ez/vZ8+zCn/hzCF/ivBpV/L37eHYQfAW8c4vmL4vsfPB1K6Lv6Xtzfq4FPM3D2URnhSPZFwtlHHyMcWdgQr7Miz2v8W1z3QUKnefb9NsfyVxHOOOpg4Eyv2r193pr2PlncwSIiBWFm5wDXuftBe60siVPzkYiMKDOrNrNzzawsNqP9K+EkBxkDdKQgY4KZvRz4db51Hs6ekiJhZhMITWFHEPo9fgX8g7tvSzQwGRYlBRER6afmIxER6TfmBtlqbGz0uXPnJh2GiMiYsmTJkk3u3rS3emMuKcydO5fFixcnHYaIyJhiZiv3XkvNRyIikqNgScHMbjCzVjN7fIj1R5jZfWbWZWafKFQcIiIyfIU8UriJcGOXoWwmXIL/pQLGICIi+6BgScHd7yV88Q+1vtXdHyRc/i4iIkVgTPQpmNklZrbYzBZv3Lgx6XBERMatMZEU3P16d1/o7gubmvZ6RpWIiOynMZEURERkdJRMUmhZsYw//fQbbNs6ZDeHiEjJK9jFa2Z2M2E89kYzayGMlFgO4O7Xmdl0YDFQD2TM7DJgQaEGzdr41F84/dEreP6wE6ifeHIhXkJEZMwrWFLwcLu/Pa1fz6637CuoitrJAOxs15GCiMhQSqb5qLIuJIWu9hcTjkREpHiVTFKYMLERgN6OLQlHIiJSvEomKdROCkmhb4eSgojIUEomKdTVTybjBjvbkg5FRKRolUxSsFSaDpuAdSkpiIgMpWSSAkCH1ZLq1m1iRUSGUlJJYUeqlgolBRGRIZVUUugsq6eyV0lBRGQoJZUUesrrqe5rTzoMEZGiVVJJobeynhrvSDoMEZGiVVJJwSsnUu/bcfekQxERKUollRSoaqDSeujoUBOSiEg+JZUUUhMmAdDetinhSEREilNJJYWymjAo3o6tSgoiIvmUVFLQ8NkiIntWUkmhKg6f3b1Nw2eLiORTUklhQkMYKbVnu0ZKFRHJp6SSQu1EDZ8tIrInJZUUaupD85F3bk04EhGR4lRSScHSZXR4NbbukaRDEREpSiWVFAC2pSdSVl2fdBgiIkWp5JLC9nQDld3qUxARyadgScHMbjCzVjN7fIj1ZmZfM7PlZvaomR1fqFhydZY3UNOru6+JiORTyCOFm4Cz97D+HGB+nC4BvlnAWPr1VE2mNqOOZhGRfAqWFNz9XmBPlw6fD/yPB/cDDWY2o1DxZGWqJtPg7fRlNFKqiMhgSfYpzAJW5yy3xLLdmNklZrbYzBZv3LjxgF7UaqZQbd1saVO/gojIYEkmBctTlvfnu7tf7+4L3X1hU1PTAb1oujY8f9vmDQe0HRGR8SjJpNACzM5ZbgbWFvpFK+tDUti+eX2hX0pEZMxJMincBvxtPAvpFGCru68r9ItWN0wDYOfWA2uGEhEZj8oKtWEzuxlYBDSaWQvwr0A5gLtfB9wBnAssB3YA7ylULLnqJoek0N2upCAiMljBkoK7X7SX9Q58uFCvP5T6xnCCU0ZJQURkNyV3RXP5hAZ6PQWr7k86FBGRolNySQEztqXqSddOSToSEZGiU3pJAdiUqWPn1takwxARKTolmRQy1ZOZlt6edBgiIkWnJJNCd8Vkavs0KJ6IyGAlmRT6qicz0beS0fhHIiK7KMmkQE0jDWynbfvOpCMRESkqJZkUyupnkDJnc2tL0qGIiBSVkkwKVZPDBWzbNq5JOBIRkeJSkkmhZnIYoXvH5oIPtSQiMqaUZFKYNK0ZgJ6tSgoiIrlKMilUN4TmI2/X8NkiIrlKMilQXkU7NaR2aFA8EZFcpZkUgK1lU6jcqaQgIpKrZJPC9vIp1Pa8mHQYIiJFpWSTQndVIxP7NicdhohIUSnZpJCpmcoU2tje1Zt0KCIiRaNkk8L9rWXUWBcbX9TRgohIVskmhUUnHA1AW+vqhCMRESkeJZsUJsSrmm+6U7flFBHJKtmk0BCvan7DwemEIxERKR4lmxRqG2cD0LtVg+KJiGQVNCmY2dlmtszMlpvZ5XnWH2Rmd5nZo2b2ezNrLmQ8u7x29SQ6qSDVrvGPRESyCpYUzCwNXAucAywALjKzBYOqfQn4H3c/BrgK+Gyh4skTIFvKplK9U0lBRCSrkEcKJwHL3f15d+8GbgHOH1RnAXBXnL8nz/qC2l45lfoeDXUhIpJVyKQwC8g937MlluVaCrwlzr8JqDOzKQWMaRfdNTNozGykq7dvtF5SRKSoFTIpWJ4yH7T8CeBMM3sYOBNYA+x2ibGZXWJmi81s8caNI/jLvn4WU2ljw5btI7dNEZExrJBJoQWYnbPcDKzNreDua939ze5+HHBlLNs6eEPufr27L3T3hU1NTSMWYPnk2ZRZho0bdAGbiAgUNik8CMw3s3lmVgFcCNyWW8HMGs0sG8MVwA0FjGc3tU1zAGjfsGI0X1ZEpGgVLCm4ey9wKXAn8BRwq7s/YWZXmdl5sdoiYJmZPQNMA64uVDz5TJpxMACdL+pIQUQEoKyQG3f3O4A7BpV9Omf+x8CPCxnDnlRNDpdFZNpakgpBRKSolOwVzQBUT6KTStIdulZBRARKPSmY0VbepAvYRESi0k4KwI6q6Uzs3oD74LNlRURKT8knhd76OcyklU0d3UmHIiKSuJJPCn/eNIEm20bLhk1JhyIikriSTwrnnHEqAJvXPJtwJCIiySv5pDBp1qEA3PmnBxKOREQkeSWfFCobwwVsp07R+EciIiWfFKhpotMqKW9flXQkIiKJU1Iwo61iBnU7dVtOERElBWDnhGaaejfQ2aP7KohIaVNSALZt2cBsa6Vls/oVRKS0KSkAjSe9jTrbyao1akISkdKmpABMbj4MgM0tTycciYhIspQUgOoZRwLw6CNLEo5ERCRZe00KZnaImVXG+UVm9lEzayh8aKNo0lz6SHF0VWvSkYiIJGo4Rwo/AfrM7FDgv4F5wA8KGtVoK6ugrWImE3es1GipIlLShpMUMvHWmm8CvuLuHwNmFDas0besdzpzMms1WqqIlLThJIUeM7sIeDdweywrL1xIyZhz2EuYa+t5rnVb0qGIiCRmOEnhPcCpwNXu/oKZzQO+V9iwRl/NzCOptm7WrX4u6VBERBKz16Tg7k+6+0fd/WYzmwTUufvnRiG2UTWxOZyBdPef/pxwJCIiyRnO2Ue/N7N6M5sMLAVuNLMvFz600ZVqnA/A9N6WhCMREUnOcJqPJrr7NuDNwI3ufgLw6sKGlYC66XSmajiENToDSURK1nCSQpmZzQAuYKCjeVjM7GwzW2Zmy83s8jzr55jZPWb2sJk9ambn7sv2R5QZ2+oPZW5mFRu2dSUWhohIkoaTFK4C7gSec/cHzexgYK/3rjSzNHAtcA6wALjIzBYMqvYp4FZ3Pw64EPjGvgQ/0hbvmM7htpqn1m1NMgwRkcQMp6P5R+5+jLt/KC4/7+5vGca2TwKWx/rdwC3A+YM3D9TH+YnA2uGHPvJeccYiJlkHX/35H5MMQ0QkMcPpaG42s5+ZWauZbTCzn5hZ8zC2PQtYnbPcEstyfQZ4p5m1AHcAHxkihkvMbLGZLd64ceMwXnr/VD/2fQDOatxcsNcQESlmw2k+uhG4DZhJ+FL/ZSzbG8tTNrgH9yLgJndvBs4Fvmtmu8Xk7te7+0J3X9jU1DSMl95Pf/sLAFIbnyrca4iIFLHhJIUmd7/R3XvjdBMwnG/mFmB2znIzuzcPvRe4FcDd7wOqgMZhbLswahrZXj6Zxh3PsbNbd2ETkdIznKSwyczeaWbpOL0TeHEYz3sQmG9m88ysgtCRfNugOquAVwGY2ZGEpFC49qFhWOazmW+reWKtOptFpPQMJyn8HeF01PXAOuCthKEv9igOoncp4cylpwhnGT1hZleZ2Xmx2j8C7zezpcDNwMWe8EUCR7z0FA631fzTDx9KMgwRkUSU7a2Cu68CzsstM7PLgK8M47l3EDqQc8s+nTP/JPCy4QY7GiasuAesh6ldLyQdiojIqNvfO699fESjKCYXhVtFHNKz10sxRETGnf1NCvnOLBofJh/Cdqo5wp+nbYfurSAipWV/k8L4HRwolaJ32jEck3qepS3qbBaR0jJkUjCzdjPblmdqJ1yzMG5Vzz2RI20VDz2/IelQRERG1ZAdze5eN5qBFJOK2cfDAz2sX/4wcHTS4YiIjJr9bT4a32YeB0D5hqV09eoiNhEpHUoK+UyaRyflHO3P8pj6FUSkhCgp5GNGqmoiJ6aWcdkPH0k6GhGRUaOkMISKqloOSa2jYmeio26IiIyq4Qydne8spNVxOO2DRyPIRFRNBOCInifo7FG/goiUhuEcKXwZ+CfCsNnNwCeAbxFumnND4UJL2Pvvooc0J9rT/PUF3V9BRErDcJLC2e7+X+7e7u7b3P164Fx3/yEwqcDxJSddTqqyjpNTT3PvM2pCEpHSMJykkDGzC8wsFacLctaN3yubgfSpf88RqVUseUaD44lIaRhOUngH8C6gNU7vItxCs5owNPb4ddBppHAaNj3MmradSUcjIlJwe00K7v68u7/B3Rvj9AZ3X+7uO939T6MRZGKaF5JJV3F66nHefv39SUcjIlJwwzn7qDmeadRqZhvM7Cdm1jwawSWuvJrU3JdxVsWjTJtYlXQ0IiIFN5zmoxsJt9GcSTgD6ZexrDRsfp7ZmTWsXfE0G9u7ko5GRKSghpMUmtz9RnfvjdNNQFOB4yoe7/gxAItSS7ngur8kHIyISGENJylsMrN3mlk6Tu8EXix0YEVjyiH4pLm8Mr2UF7frpjsiMr4NJyn8HXABsB5YB7wVeE8hgyoqZtihr+HU1BN0de7QWUgiMq4N5+yjVe5+nrs3uftUd38j8OZRiK14HHEu1XRxZmopb7vuvqSjEREpmP0dEO/jIxpFsZt7BlRP5nXpv7Jm604ymXF9zZ6IlLD9TQo2rEpmZ5vZMjNbbmaX51n/n2b2SJyeMbO2/YynsNJlcOTreU16CRXezQMaC0lExqn9TQp7/alsZmngWuAcYAFwkZkt2GUj7h9z92Pd/Vjg68BP9zOewmtZwgQ6eUX6US79wUNJRyMiUhBDJoUhhszeZmbthGsW9uYkYHm8IrqbMKrq+XuofxFw8z5FP5o+cC9UT+bc9AO8uL2b1vbOpCMSERlxQyYFd69z9/o8U527lw1j27OA1TnLLbFsN2Z2EDAPuHuI9ZeY2WIzW7xxY0IjlqbLIF3O69P3M4FOTv9c3lBFRMa0Qt55LV+/w1DNThcCP3b3vHezcffr3X2huy9sakrwurkLvkvK+/hw2S/o7nN2duvmOyIyvhQyKbQAs3OWm4G1Q9S9kGJuOsqafRJMmc+ZVcsBOPsr9yYckIjIyCpkUngQmG9m88ysgvDFf9vgSmZ2OOFmPcV/AYAZ9PVwdO8THJJax6rNO9jR3Zt0VCIiI6ZgScHdewn3W7gTeAq41d2fMLOrzOy8nKoXAbe4+9g4+f+9dwLwjYprcOCs/9TRgoiMH8PpMN5v7n4HcMegsk8PWv5MIWMYcXXT4ag3cfjyu6nv7qRlC7Tt6KZhQkXSkYmIHLBCNh+NX6d+BLq28ttFqwA48erfJRyQiMjIUFLYH80nQGU90x74LFXpDD19zpKVW5KOSkTkgCkp7K/6WdDXxSN/E26885Zv/oWevkzCQYmIHBglhf31oT9DWRVVv/oIhzVNAGDRF+9JOCgRkQOjpLC/Umk4/1ro2cFvKv+ZspSxpq1T1y6IyJimpHAgjnozlE+AtpUs/dQrAHh6fbvGRRKRMUtJ4UCkUtAwF3o7qXn0Ju687AwATrr6Ljp7NASGiIw9SgoH6u//AlUNcOeVHF6zg/lTawE4+l/v1M14RGTMUVI4UGbwvrvA++A/j+K3Hz+TOZMn0JtxDv8/v1ZiEJExRUlhJDQeCvXNkOmBF/7IH/5pERVpo6cvJIaxMoKHiIiSwki59K+AwXfegHW1s+zfzulPDId96td09+oaBhEpfkoKI6WiBt77G8DhCwdjZrslhhc7upKOUkRkj5QURtLskyBdEZqRlt6CmfHM1edySFMNACf82+94tKUt4SBFRIampDDS/mUtWBp+9gFY8xAAd/3jIm679GUAnHfNn3nZ5+6mV0NiiEgRUlIYaely+MQzgMG3XgkdrQAc09zA0k+fFa983smhV/6ax9dsTTZWEZFBlBQKoaYRPnAv4PCl+dDbDcDECeUs//dzuebtxwHw+q//iflX3sHm7d0JBisiMkBJoVBmHAONh4f5Lx8JfQO37Xz9MTNZ+umzKI+d0Mf/v99y2Kd+zdadPQkFKyIS2Fg7h37hwoW+ePHipMMYvq+8FNpWQKocPrUhDKSX49kN7bwm55aeU+sq+d77TuawaXWjHKiIjGdmtsTdF+61npLCKPjKMdC2MiSGK9eFfodBHl+zlTd+48/09oXPI50yrn378bz6yKmUpXVAJyIHRkmh2PQnhjK4fFW4riGPzdu7Of+aP7F6y87+sun1Vfz3xQtZMKMeMxutiEVkHFFSKEZfOwE2Lw/zlz0ODbOHrNrbl+Gup1v5wHeX7FI+Y2IVX7/oOI6bM4l0SglCRIZHSaFYPXMn/OCCMH/xHTD3ZXt9ypbt3dzx+Dqu/Nnju5SXpYwrX3ckJ82bzBHT65UkRGRIRZEUzOxs4KtAGvi2u38uT50LgM8ADix197fvaZtjPikAbHwGrj0xzJ/7JTjxfWG01WHY1tnDH5Zt5GM/fITeQSOwNlSX88FFh3DSvMkcPXMiFWXqixCRIPGkYGZp4BngNUAL8CBwkbs/mVNnPnAr8Ep332JmU929dU/bHRdJAaBzK3zhYMj0hn6Gjz8NtU37vJm1bTt5cMVmPvGjpfT07f5Zvvf0eRw6tZb5U2s5dGotDRMqRiJ6ERljiiEpnAp8xt1fG5evAHD3z+bU+QLwjLt/e7jbHTdJASCTgQe+CXf+S1h+87fhJW8d9lFDPps6uli8YjMPvLCZG/+8Im+dUw+eEhLFtJAoDp1aS1NtpTqxRcaxYkgKbwXOdvf3xeV3ASe7+6U5dX5OOJp4GaGJ6TPu/r972u64SgpZrU/BN08Dz4Rxkz7813CPhhGQyThr2nayvLWDZ1vb+eKdy/IeUQDUVpbx+mNm9CeKeY01zJhYrWYokXGgGJLC3wCvHZQUTnL3j+TUuR3oAS4AmoE/Ake7e9ugbV0CXAIwZ86cE1auXFmQmBOV6YPFN8AdnwjLp/w9nPlJqJ5UkJdzd1rbu3h2QwfLW9v5xu+fo7V96KG9T5w7iVkN1TRPmsC0iVU01VbSVFfJ1LrwWFWeHvK5IpK8YkgKw2k+ug64391vist3AZe7+4NDbXdcHink6miF606Hjg1h+ayr4YSLobJ21ELYvL2bZze0s2rzDlq27KRly05+/nALQxxg9KsqT/HS5gam1g8kjeyUTR6TJ1SQ0llSIqOuGJJCGaFp6FXAGkJH89vd/YmcOmcTOp/fbWaNwMPAse7+4lDbHfdJIWv94/BfZ4R7PwOc8Uk4+QNhsL0E9fZl2Ly9m9b2LjZ2dLFxW3xs7+K2pWuHPbjfUTPraaqrpKG6nPrqcuqryqmvLmNi//yuZbWVZbqyW+QAJJ4UYhDnAl8h9Bfc4O5Xm9lVwGJ3v81Cz+Z/AGcDfcDV7n7LnrZZMkkha/WDcOPZ4SwlgJMugVMvhUkHJRvXMGzv6mVTR1dIIIOnji5+v6yVzH78+R0xvW4gcVSX5SSRmFRyEkptZRkTKsqoqUxTXZ5WZ7qUrKJICoVQckkha+MzoTM6E0dSfckF8LJ/gOlHJxvXCHF3Orp62dbZy7adPWHq7GVr/3wP23b2sq2zh607e/jtkxsO+DVTBsfPmURVeZqq8lR8DPPV/fMDZVVlaaorBuarKtLhsTwVystC3cqylJrIpOgoKYxXW9fA/d+A+64Jy5X18Lr/gCPPg/KqZGNLUF/G6egcSBohifSwvauPHd29bO/uY0d3H9u7enMew3xnTx8PrRqd26SeOHcSlTGRVJSlqEinKE+nKEunqEgb5ekU5WWhrH85PVC3omxguTxtu5WXp42yVIqy+NyylFGWU16eNh0tlSglhfFu5xZY8h343b8OlJ3y4dAp3XRYYmGNJ+5OV2+Grp4Mnb197Ozuo7O3j86eTP98V08fO3tCWWd83NkTyjtz1nX19vVv67E1W+no6t17AAkpTxuTaypIm5FKGemUDcz3l0HKjJTlrg+j++5aZqQsf3nuttIWklU6vl6oS566YXup3eruHlt23UBd8tTN1hkU427bZfft5Sm3GJsRLjcqpgSspFAqMhlYcS8svhGe/Hkoq6iBMy+Ho960x0H3pHi5Oz19Tm8mQ3dvhu7ouNGoAAAQLklEQVS+8NjT5/ExQ1d87M59zDi9fRl6+5yeTHzsy9CbLc84mYzT505fBjLu9GXClJ3POP11Bup6Tt3wvMyg8ifWbKO9iJPdWFKWMuqqyrCYsCAknItPm8uHX7F/1zApKZSijlZ49Fb47f8JF8JBaF561adD81LdtGTjE9kPgxNTxkNzYd7ElaF/3j1/eX8yyz4/mwSHTIRhfcZz1mecPt81NnfHPQzilsnOu+OAeyyL6/CBOhkHJ8zDrs/JlocTMpwzD2vi7KNn7Nd+VFIodZufh8d/CvdcPZAgKmrh9MvgsHNg2lEHNJyGiIwtSgoyoPUpePp2uOezA9c9pCvhhHfD4efAQadDmQbKExnPlBQkv/b14Z4Ov/r4wLUPAAveCPNfA/POgIY5ycUnIgWhpCB7170DXrgXlt0BD31noLysMlwHMe8MmPtyqN+/NkwRKR5KCrJv3EMz0wv3woo/huamrCnzYd7LQ5KYc5o6rEXGICUFOTCZPlj/WEgQL9wLz/4mZ6XBSy+E2SeHqekISGlcIpFipqQgI6uvF9Y9Aqvug1X3w9O/Ipx8Fx3ySpi1EJoXhseaKYmFKiK7G25SKBuNYGQcSJeFL/zmhXDaR0Jz0+bnYfUDIUk89iN47u6B+mVVcOQbBhLF9JeEvgoRKWo6UpCR09URjiZaFsOaxfDUL3ddP+uEmCROhOYTYNI8XSshMkrUfCTFYeuakCBaFsOaJbDyzzkrDQ56GUxbAFMXhAvqph4JlXWJhSsyXqn5SIrDxFlhWnB+WO7rhY1PhSSx9mFofRL+ev2uz2mYA1Njgph2VEgYjfMhXT768YuUGCUFGV3pstC/MP0lwHtCWSYDW1fBhieh9YlwauzjP4Fnfj3wvFQ5NB4WjyqODElj2gKYOFtNUCIjSElBkpdKwaS5YTri3FD21hugtws2PRuOJjY8ER4f+9Guz62sj0kip/lp6gKYMHm034XIuKCkIMWrrDLcWW7w3eU6t4ajiWyi2PAkLLlx1zp1M2KiWDBwVNF4eEnfiEhkOJQUZOypmghzTglTlju0rxtogso+/uWuXZ875dBwhXbjoaE5asr80F8xYYqaoURQUpDxwgzqZ4Zp/qsHyvt6w/UU2USx8Wl4cTk887/scvFd9aSYIA7bNWFMnqcObikpSgoyvqXLwu1Jmw4Ld6LLyvRB26qQIDY9C5ueCfPLfwePfG+gXqos9HU0HhaPMg6ByYfA5INDE5WG95BxRklBSlMqHY4CJs8LQ4bn6tyakyxiwtj0bEgYfd0D9cqq4zYOznmMCaN+lhKGjElKCiKDVU2MV1+fsGt5pg+2toTmqNzpxeXw7G+hr2ugbroCGg4KyWLSvF0fGw5Sh7cUrYImBTM7G/gqkAa+7e6fG7T+YuCLwJpYdI27f7uQMYnst1QaJh0UpkNeseu6TAba18Yk8Vx43LICtrwAK++D7vZd69fNDM1Sk+fF03HnDZyWW9OoTm9JTMGSgpmlgWuB1wAtwINmdpu7Pzmo6g/d/dJCxSEyKlIpmNgcpnln7LrOHXa8CJtfCEliy4o4vwKeuyckk1wVtQMJIjtljzQmztatU6WgCnmkcBKw3N2fBzCzW4DzgcFJQWR8Mwu//msaYfaJu6/v2Rk6vbOJIps4sh3fvZ0520pBfXM4WmmIRy0NcwamuhnhiEZkPxUyKcwCVucstwAn56n3FjM7A3gG+Ji7rx5cwcwuAS4BmDNH9w+Wcaa8GpoOD9NgmQx0bBhIFNmjjLaVIWF0rN+1fqosHK30JwolDdk3hUwK+RpFBw/J+kvgZnfvMrMPAt8BXrnbk9yvB66HMErqSAcqUrRSqXCP7PoZcNBpu6/v6Qyd320rw9FG7vTsHpLGxNm7J4yGOeE6DyWNklbIpNACzM5ZbgZ2aTx19xdzFr8FfL6A8YiMP+VV8WK7Q/Ov31PSGOpIo35W/qOMhtmhgzytkxbHs0J+ug8C881sHuHsoguBt+dWMLMZ7r4uLp4HPFXAeERKz0gnDUtB7fQwHHr9rIHO9fo4RHp9M9Q06RqNMaxgScHde83sUuBOwimpN7j7E2Z2FbDY3W8DPmpm5wG9wGbg4kLFIyJ5DCdpbFsTksaWlWF+6xrY1gLrHwvDheR2hEMY5rx+RkgU/ckiO80MSWRCoxJHkdKd10Rk/7nDjs0hSWxtgW1rBx63rYnT2l2vBIdwcV/djIGjjGyyqJ85cASiQQpHlO68JiKFZwY1U8I046X562QysGNTzlHG2phE4vzq+2HbOsj07Pq8dGVOkpiVJ3k0h/tmKHGMKCUFESmsVApqp4Zp5nH562QysH1jSBbb1saEsWYgkay8L1zkl+nd9XllVbseXWTn62dC3fTQMa4+jn2ipCAiyUuloG5amAaPOZWV6YOO1oEjjf6mqnjE8cIfwz01vG/QtsugdlporqqfER7rpu/+WNWgow6UFERkrEilB67ZYE+JY0NojmqP07a10L4+HGlsehZeuDeMhDtYWVVIDrXT8yeN7GNl3bhOHkoKIjJ+pNIDN1vak+4d4XTb9vUxeeQ+rocNj4dTcrs7dn9uec2gZDFEAqmYUJj3WGBKCiJSeiomxPtfHLznel3t0L4hT+KIj2uWhPnBp+UCVE7cc9LIriurLMx73E9KCiIiQ6msC9NQ13FAOC23c+sQRx3xceVfwvzgM6wAqifnTxZ1Of0ftVNH7bawSgoiIgfCDKobwjT1iKHrZTKwc8vQiaN9HbQ+FfpEBneWY+EsqlM+BC//eEHfjpKCiMhoSKUGrumYfvTQ9TJ9sH1T/qQxeV7Bw1RSEBEpJqn0wOm5Sbx8Iq8qIiJFSUlBRET6KSmIiEg/JQUREemnpCAiIv2UFEREpJ+SgoiI9FNSEBGRfmPudpxmthFYuZ9PbwQ2jWA4o2msxj5W44axG/tYjRvGbuxjIe6D3L1pb5XGXFI4EGa2eDj3KC1GYzX2sRo3jN3Yx2rcMHZjH6tx56PmIxER6aekICIi/UotKVyfdAAHYKzGPlbjhrEb+1iNG8Zu7GM17t2UVJ+CiIjsWakdKYiIyB4oKYiISL+SSQpmdraZLTOz5WZ2edLx5DKz2WZ2j5k9ZWZPmNk/xPLJZvZbM3s2Pk6K5WZmX4vv5VEzOz7h+NNm9rCZ3R6X55nZAzHuH5pZRSyvjMvL4/q5CcfdYGY/NrOn474/dQzt84/Fv5XHzexmM6sqxv1uZjeYWauZPZ5Tts/72MzeHes/a2bvTjD2L8a/l0fN7Gdm1pCz7ooY+zIze21OedF+9+Tl7uN+AtLAc8DBQAWwFFiQdFw58c0Ajo/zdcAzwALgC8Dlsfxy4PNx/lzg14ABpwAPJBz/x4EfALfH5VuBC+P8dcCH4vzfA9fF+QuBHyYc93eA98X5CqBhLOxzYBbwAlCds78vLsb9DpwBHA88nlO2T/sYmAw8Hx8nxflJCcV+FlAW5z+fE/uC+L1SCcyL3zfpYv/uyfu+kw5gVN4knArcmbN8BXBF0nHtId5fAK8BlgEzYtkMYFmc/y/gopz6/fUSiLUZuAt4JXB7/IfelPOP07/vgTuBU+N8WaxnCcVdH79YbVD5WNjns4DV8UuyLO731xbrfgfmDvpi3ad9DFwE/FdO+S71RjP2QeveBHw/zu/ynZLd52Ptu8fdS6b5KPtPlNUSy4pOPLQ/DngAmObu6wDi49RYrZjez1eATwKZuDwFaHP33ricG1t/3HH91lg/CQcDG4EbY9PXt82shjGwz919DfAlYBWwjrAflzA29jvs+z4umn0/yN8Rjmxg7MU+pFJJCpanrOjOxTWzWuAnwGXuvm1PVfOUjfr7MbPXA63uviS3OE9VH8a60VZGaBr4prsfB2wnNGUMpWhij23w5xOaKWYCNcA5eaoW437fk6HiLLr4zexKoBf4frYoT7WijH1vSiUptACzc5abgbUJxZKXmZUTEsL33f2nsXiDmc2I62cArbG8WN7Py4DzzGwFcAuhCekrQIOZleWJrT/uuH4isHk0A87RArS4+wNx+ceEJFHs+xzg1cAL7r7R3XuAnwKnMTb2O+z7Pi6mfU/s6H498A6PbUKMkdiHo1SSwoPA/Hh2RgWhs+22hGPqZ2YG/DfwlLt/OWfVbUD2TIt3E/oasuV/G8/WOAXYmj0cH03ufoW7N7v7XMI+vdvd3wHcA7x1iLiz7+etsX4iv5rcfT2w2swOj0WvAp6kyPd5tAo4xcwmxL+dbOxFv9/zxDOcfXwncJaZTYpHSWfFslFnZmcD/wyc5+47clbdBlwYz/SaB8wH/kqRf/fklXSnxmhNhDMbniGcCXBl0vEMiu10wiHlo8AjcTqX0O57F/BsfJwc6xtwbXwvjwELi+A9LGLg7KODCf8Qy4EfAZWxvCouL4/rD0445mOBxXG//5xwZsuY2OfA/wWeBh4Hvks466Xo9jtwM6Hfo4fwq/m9+7OPCe33y+P0ngRjX07oI8j+n16XU//KGPsy4Jyc8qL97sk3aZgLERHpVyrNRyIiMgxKCiIi0k9JQURE+ikpiIhIPyUFERHpp6QgIiL9lBREhsHMjjWzc3OWzxupYZDN7DIzmzAS2xI5ULpOQWQYzOxiwsVUlxZg2yvitjftw3PS7t430rGI6EhBxhUzm2vhhjnfijeh+Y2ZVQ9R9xAz+18zW2JmfzSzI2L538Sb1yw1s3vj8ARXAW8zs0fM7G1mdrGZXRPr32Rm37Rwo6TnzezMeIOWp8zsppzX+6aZLY5x/d9Y9lHCoHb3mNk9sewiM3ssxvD5nOd3mNlVZvYAcKqZfc7Mnow3fPlSYfaolJykL6nWpGkkJ8L4973AsXH5VuCdQ9S9C5gf508mjAkEYYiFWXG+IT5eDFyT89z+ZeAmwoCARhi9dBvwEsKPriU5sWSHc0gDvweOicsrgMY4P5MwtlETYSTXu4E3xnUOXJDdFmE4BcuNU5OmA510pCDj0Qvu/kicX0JIFLuIw5SfBvzIzB4h3LhlRlz9Z+AmM3s/4Qt8OH7p7k5IKBvc/TF3zwBP5Lz+BWb2EPAwcBThbl2DnQj83sMIqNmhmc+I6/oII+lCSDydwLfN7M3Ajt22JLIfyvZeRWTM6cqZ7wPyNR+lCDelOXbwCnf/oJmdDLwOeMTMdquzh9fMDHr9DFAWR878BHCiu2+JzUpVebaTb/z9rE6P/Qju3mtmJxFGSL0QuJQwdLnIAdGRgpQkDzcxesHM/gb6bxr/0jh/iLs/4O6fJty6cjbQTrh/9v6qJ9zIZ6uZTWPXm+LkbvsB4EwzazSzNOFWlH8YvLF4pDPR3e8ALiOM+CpywHSkIKXsHcA3zexTQDmhX2Ap8EUzm0/41X5XLFsFXB6bmj67ry/k7kvN7GFCc9LzhCaqrOuBX5vZOnd/hZldQbg3ggF3uPsvdt8idcAvzKwq1vvYvsYkko9OSRURkX5qPhIRkX5qPpJxz8yuJdxPOtdX3f3GJOIRKWZqPhIRkX5qPhIRkX5KCiIi0k9JQURE+ikpiIhIv/8PB1yrxmBTSz8AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x2e1b43a2860>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cvresult = pd.DataFrame.from_csv('6_nestimators.csv')\n",
    "        \n",
    "# plot\n",
    "test_means = cvresult['test-mlogloss-mean']\n",
    "test_stds = cvresult['test-mlogloss-std'] \n",
    "        \n",
    "train_means = cvresult['train-mlogloss-mean']\n",
    "train_stds = cvresult['train-mlogloss-std'] \n",
    "\n",
    "x_axis = range(0, cvresult.shape[0])\n",
    "        \n",
    "pyplot.errorbar(x_axis, test_means, yerr=test_stds ,label='Test')\n",
    "pyplot.errorbar(x_axis, train_means, yerr=train_stds ,label='Train')\n",
    "pyplot.title(\"XGBoost n_estimators vs Log Loss\")\n",
    "pyplot.xlabel( 'n_estimators' )\n",
    "pyplot.ylabel( 'Log Loss' )\n",
    "pyplot.savefig( 'n_estimators6.png' )\n",
    "\n",
    "pyplot.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 保存模型，供测试使用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存模型\n",
    "import pickle\n",
    "pickle.dump(xgb6, open(\"xgb_model.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "logloss of train is: 0.47994618420999946\n"
     ]
    }
   ],
   "source": [
    "#保存数据\n",
    "import pickle\n",
    "\n",
    "xgb = pickle.load(open(\"xgb_model.pkl\", 'rb'))\n",
    "\n",
    "train_predprob = xgb.predict_proba(X_train)\n",
    "logloss = log_loss(y_train, train_predprob)\n",
    "\n",
    "#Print model report:\n",
    "print ('logloss of train is:', logloss)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 七、 利用训练好的模型，在测试集上进行测试¶"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 读取测试数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>listing_id</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>price</th>\n",
       "      <th>price_bathrooms</th>\n",
       "      <th>price_bedrooms</th>\n",
       "      <th>room_diff</th>\n",
       "      <th>room_num</th>\n",
       "      <th>...</th>\n",
       "      <th>virtual</th>\n",
       "      <th>walk</th>\n",
       "      <th>walls</th>\n",
       "      <th>war</th>\n",
       "      <th>washer</th>\n",
       "      <th>water</th>\n",
       "      <th>wheelchair</th>\n",
       "      <th>wifi</th>\n",
       "      <th>windows</th>\n",
       "      <th>work</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>7142618</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7185</td>\n",
       "      <td>-73.9865</td>\n",
       "      <td>2950</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>1475.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>7210040</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7278</td>\n",
       "      <td>-74.0000</td>\n",
       "      <td>2850</td>\n",
       "      <td>1425.000000</td>\n",
       "      <td>950.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7103890</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>40.7306</td>\n",
       "      <td>-73.9890</td>\n",
       "      <td>3758</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>1879.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7143442</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7109</td>\n",
       "      <td>-73.9571</td>\n",
       "      <td>3300</td>\n",
       "      <td>1650.000000</td>\n",
       "      <td>1100.000000</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6860601</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>40.7650</td>\n",
       "      <td>-73.9845</td>\n",
       "      <td>4900</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>1633.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 225 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   listing_id  bathrooms  bedrooms  latitude  longitude  price  \\\n",
       "0     7142618        1.0         1   40.7185   -73.9865   2950   \n",
       "1     7210040        1.0         2   40.7278   -74.0000   2850   \n",
       "2     7103890        1.0         1   40.7306   -73.9890   3758   \n",
       "3     7143442        1.0         2   40.7109   -73.9571   3300   \n",
       "4     6860601        2.0         2   40.7650   -73.9845   4900   \n",
       "\n",
       "   price_bathrooms  price_bedrooms  room_diff  room_num  ...   virtual  walk  \\\n",
       "0      1475.000000     1475.000000        0.0       2.0  ...         0     0   \n",
       "1      1425.000000      950.000000       -1.0       3.0  ...         0     0   \n",
       "2      1879.000000     1879.000000        0.0       2.0  ...         0     0   \n",
       "3      1650.000000     1100.000000       -1.0       3.0  ...         0     0   \n",
       "4      1633.333333     1633.333333        0.0       4.0  ...         0     0   \n",
       "\n",
       "   walls  war  washer  water  wheelchair  wifi  windows  work  \n",
       "0      0    0       0      0           0     0        0     0  \n",
       "1      0    1       0      0           0     0        0     0  \n",
       "2      0    0       0      0           0     0        0     0  \n",
       "3      0    0       0      0           1     0        0     0  \n",
       "4      0    1       0      0           0     0        0     0  \n",
       "\n",
       "[5 rows x 225 columns]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# path to where the data lies\n",
    "#dpath = './data/'\n",
    "test = pd.read_csv(\"RentListingInquries_FE_test.csv\")\n",
    "test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#准备数据\n",
    "test_id = test['listing_id']\n",
    "\n",
    "X_test = test.drop([ \"listing_id\"], axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### load 训练好的模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存数据\n",
    "import pickle\n",
    "\n",
    "xgb = pickle.load(open(\"xgb_model.pkl\", 'rb'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 在测试上测试，并生成测试结果提交文件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_test_pred = xgb.predict_proba(X_test)\n",
    "\n",
    "out_df1 = pd.DataFrame(y_test_pred)\n",
    "out_df1.columns = [\"high\", \"medium\", \"low\"]\n",
    "\n",
    "out_df = pd.concat([test_id,out_df1], axis = 1)\n",
    "out_df.to_csv(\"xgb_Rent.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
